* same as organized_HS_improved.do plus (1) city mix up corrected, and (2) use weather data with GMT adjustment
* working directory should be set to the folder into which my repository has been extracted

tempfile courts weather_collect

* correcting coordinates for some courts -- THIS IS THE NEW PART (relative to organize_HS_improved.do, which is not part of the Dataverse; it is in turn inspired by Heyes & Saberian's organize.do)
clear
input str13 city str13 state latitude longitude
"ARLINGTON"		"VA"			38.856464	-77.050461
"PHILADELPHIA"	"PA"			39.950620	-75.155489
"SAN DIEGO"		"CA"			32.718592	-117.166867
"ELIZABETH"		"New Jersey"	40.666285	-74.189695
"OTAY MESA"		"CA"			32.575133	-116.914764
"RENO"			"NEVADA"		39.462365	-119.777628
"SAN ANTONIO"	"TX"			29.423967	-98.498831
"OAKDALE"		""				30.828756	-92.640713
end
save `courts'

use data/original_article/data/Data/out/courtgps, clear
replace city = strtrim(city)
replace state = strtrim(state)
merge 1:1 city state using `courts', update replace assert(1 5) nogenerate
isid city // to verify we don't need to differentiate by state later

* from here on, it's as before, but only for ozone co pm hourly -- leaving out the daily measures, which I don't use

rename (latitude longitude) =2
save `courts', replace

foreach feature in ozone co pm hourlyweather { //  
	tempfile `feature'

	forvalues year=2000/2004 {
		if "`feature'`year'" != "co2001" { // co2001 has data for 2002 {
			if "`feature'" == "hourlyweather"	use data/original_article/reconstructed_data/hourlyweather_vargen_GMTadj_nogaps_HS.dta if year(date)==`year', clear
			else								use data/original_article/data/Data/environment/Pollution/`feature'`year', clear
			
			cross using `courts'
			geodist latitude longitude latitude2 longitude2, gen(d`feature')
			drop latitud* longitud*  // no point keeping date-measure specific lat/long in data because my looped code will merge together different measurements with different lat/long
			if "`feature'" != "hourlyweather" drop cityname
			
			bys city state date: egen double mind = min(d`feature') // this and next two lines deal with problem of equidistant weather stations
			keep if mind == d`feature'
			drop mind
			ds city state date d`feature', not
			collapse `r(varlist)', by(city state date d`feature') // takes mean of equidistant weather measurements
			cap append using ``feature'' // "cap" is needed in the first round of the loop when ``feature'' does not exist yet
			save ``feature'', replace
		}
	}
	isid city state date, missok // this catches a bug if dirty data (cf. co2001 above) creates duplicates that then throw off the merge command in the next line -- unnoticeably because of cap
	cap	merge 1:1 city state date using `weather_collect', nogen // "cap" is needed in the first round of the loop when ``weather_collect'' does not exist yet
	save `weather_collect', replace
}

save data/original_article/reconstructed_data/weather_HS_improved_citymixup_corrected, replace

merge 1:1 city date using data/original_article/data/Data/Environment/Weather/skydistance.dta, assert(3) nogen
merge 1:m date city using data/original_article/data/Data/raw/asylum, nogen keep(3)
save data/original_article/reconstructed_data/matched_HS_improved_corrections, replace